import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
df = pd.read_csv("covid_19_data.csv")
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-1-3575dc49b97a> in <module> ----> 1 df = pd.read_csv("covid_19_data.csv") NameError: name 'pd' is not defined
df.head(50)
| SNo | ObservationDate | Province/State | Country/Region | Last Update | Confirmed | Deaths | Recovered | |
|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 01/22/2020 | Anhui | Mainland China | 1/22/2020 17:00 | 1.0 | 0.0 | 0.0 |
| 1 | 2 | 01/22/2020 | Beijing | Mainland China | 1/22/2020 17:00 | 14.0 | 0.0 | 0.0 |
| 2 | 3 | 01/22/2020 | Chongqing | Mainland China | 1/22/2020 17:00 | 6.0 | 0.0 | 0.0 |
| 3 | 4 | 01/22/2020 | Fujian | Mainland China | 1/22/2020 17:00 | 1.0 | 0.0 | 0.0 |
| 4 | 5 | 01/22/2020 | Gansu | Mainland China | 1/22/2020 17:00 | 0.0 | 0.0 | 0.0 |
| 5 | 6 | 01/22/2020 | Guangdong | Mainland China | 1/22/2020 17:00 | 26.0 | 0.0 | 0.0 |
| 6 | 7 | 01/22/2020 | Guangxi | Mainland China | 1/22/2020 17:00 | 2.0 | 0.0 | 0.0 |
| 7 | 8 | 01/22/2020 | Guizhou | Mainland China | 1/22/2020 17:00 | 1.0 | 0.0 | 0.0 |
| 8 | 9 | 01/22/2020 | Hainan | Mainland China | 1/22/2020 17:00 | 4.0 | 0.0 | 0.0 |
| 9 | 10 | 01/22/2020 | Hebei | Mainland China | 1/22/2020 17:00 | 1.0 | 0.0 | 0.0 |
| 10 | 11 | 01/22/2020 | Heilongjiang | Mainland China | 1/22/2020 17:00 | 0.0 | 0.0 | 0.0 |
| 11 | 12 | 01/22/2020 | Henan | Mainland China | 1/22/2020 17:00 | 5.0 | 0.0 | 0.0 |
| 12 | 13 | 01/22/2020 | Hong Kong | Hong Kong | 1/22/2020 17:00 | 0.0 | 0.0 | 0.0 |
| 13 | 14 | 01/22/2020 | Hubei | Mainland China | 1/22/2020 17:00 | 444.0 | 17.0 | 28.0 |
| 14 | 15 | 01/22/2020 | Hunan | Mainland China | 1/22/2020 17:00 | 4.0 | 0.0 | 0.0 |
| 15 | 16 | 01/22/2020 | Inner Mongolia | Mainland China | 1/22/2020 17:00 | 0.0 | 0.0 | 0.0 |
| 16 | 17 | 01/22/2020 | Jiangsu | Mainland China | 1/22/2020 17:00 | 1.0 | 0.0 | 0.0 |
| 17 | 18 | 01/22/2020 | Jiangxi | Mainland China | 1/22/2020 17:00 | 2.0 | 0.0 | 0.0 |
| 18 | 19 | 01/22/2020 | Jilin | Mainland China | 1/22/2020 17:00 | 0.0 | 0.0 | 0.0 |
| 19 | 20 | 01/22/2020 | Liaoning | Mainland China | 1/22/2020 17:00 | 2.0 | 0.0 | 0.0 |
| 20 | 21 | 01/22/2020 | Macau | Macau | 1/22/2020 17:00 | 1.0 | 0.0 | 0.0 |
| 21 | 22 | 01/22/2020 | Ningxia | Mainland China | 1/22/2020 17:00 | 1.0 | 0.0 | 0.0 |
| 22 | 23 | 01/22/2020 | Qinghai | Mainland China | 1/22/2020 17:00 | 0.0 | 0.0 | 0.0 |
| 23 | 24 | 01/22/2020 | Shaanxi | Mainland China | 1/22/2020 17:00 | 0.0 | 0.0 | 0.0 |
| 24 | 25 | 01/22/2020 | Shandong | Mainland China | 1/22/2020 17:00 | 2.0 | 0.0 | 0.0 |
| 25 | 26 | 01/22/2020 | Shanghai | Mainland China | 1/22/2020 17:00 | 9.0 | 0.0 | 0.0 |
| 26 | 27 | 01/22/2020 | Shanxi | Mainland China | 1/22/2020 17:00 | 1.0 | 0.0 | 0.0 |
| 27 | 28 | 01/22/2020 | Sichuan | Mainland China | 1/22/2020 17:00 | 5.0 | 0.0 | 0.0 |
| 28 | 29 | 01/22/2020 | Taiwan | Taiwan | 1/22/2020 17:00 | 1.0 | 0.0 | 0.0 |
| 29 | 30 | 01/22/2020 | Tianjin | Mainland China | 1/22/2020 17:00 | 4.0 | 0.0 | 0.0 |
| 30 | 31 | 01/22/2020 | Tibet | Mainland China | 1/22/2020 17:00 | 0.0 | 0.0 | 0.0 |
| 31 | 32 | 01/22/2020 | Washington | US | 1/22/2020 17:00 | 1.0 | 0.0 | 0.0 |
| 32 | 33 | 01/22/2020 | Xinjiang | Mainland China | 1/22/2020 17:00 | 0.0 | 0.0 | 0.0 |
| 33 | 34 | 01/22/2020 | Yunnan | Mainland China | 1/22/2020 17:00 | 1.0 | 0.0 | 0.0 |
| 34 | 35 | 01/22/2020 | Zhejiang | Mainland China | 1/22/2020 17:00 | 10.0 | 0.0 | 0.0 |
| 35 | 36 | 01/22/2020 | NaN | Japan | 1/22/2020 17:00 | 2.0 | 0.0 | 0.0 |
| 36 | 37 | 01/22/2020 | NaN | Thailand | 1/22/2020 17:00 | 4.0 | 0.0 | 2.0 |
| 37 | 38 | 01/22/2020 | NaN | South Korea | 1/22/2020 17:00 | 1.0 | 0.0 | 0.0 |
| 38 | 39 | 01/22/2020 | Unknown | China | 1/22/2020 17:00 | 0.0 | 0.0 | 0.0 |
| 39 | 40 | 01/22/2020 | NaN | Kiribati | 1/22/2020 17:00 | 0.0 | 0.0 | 0.0 |
| 40 | 41 | 01/23/2020 | Anhui | Mainland China | 1/23/20 17:00 | 9.0 | 0.0 | 0.0 |
| 41 | 42 | 01/23/2020 | Beijing | Mainland China | 1/23/20 17:00 | 22.0 | 0.0 | 0.0 |
| 42 | 43 | 01/23/2020 | Chongqing | Mainland China | 1/23/20 17:00 | 9.0 | 0.0 | 0.0 |
| 43 | 44 | 01/23/2020 | Fujian | Mainland China | 1/23/20 17:00 | 5.0 | 0.0 | 0.0 |
| 44 | 45 | 01/23/2020 | Gansu | Mainland China | 1/23/20 17:00 | 2.0 | 0.0 | 0.0 |
| 45 | 46 | 01/23/2020 | Guangdong | Mainland China | 1/23/20 17:00 | 32.0 | 0.0 | 2.0 |
| 46 | 47 | 01/23/2020 | Guangxi | Mainland China | 1/23/20 17:00 | 5.0 | 0.0 | 0.0 |
| 47 | 48 | 01/23/2020 | Guizhou | Mainland China | 1/23/20 17:00 | 3.0 | 0.0 | 0.0 |
| 48 | 49 | 01/23/2020 | Hainan | Mainland China | 1/23/20 17:00 | 5.0 | 0.0 | 0.0 |
| 49 | 50 | 01/23/2020 | Hubei | Mainland China | 1/23/20 17:00 | 444.0 | 17.0 | 28.0 |
df.drop(['SNo','Last Update'],axis=1,inplace=True)
df.rename(columns={'ObservationDate':'Date','Province/State':'State','Country/Region':'Country'},inplace=True)
df['Date'] = pd.to_datetime(df['Date'])
imputer = SimpleImputer(strategy='constant')
df2 = pd.DataFrame(imputer.fit_transform(df),columns=df.columns)
df3 = df2.groupby(['Country','Date'])[['Country','Date','Confirmed','Deaths','Recovered']].sum().reset_index()
df3.head(20)
| Country | Date | Confirmed | Deaths | Recovered | |
|---|---|---|---|---|---|
| 0 | Azerbaijan | 2020-02-28 | 1.0 | 0.0 | 0.0 |
| 1 | ('St. Martin',) | 2020-03-10 | 2.0 | 0.0 | 0.0 |
| 2 | Afghanistan | 2020-02-24 | 1.0 | 0.0 | 0.0 |
| 3 | Afghanistan | 2020-02-25 | 1.0 | 0.0 | 0.0 |
| 4 | Afghanistan | 2020-02-26 | 1.0 | 0.0 | 0.0 |
| 5 | Afghanistan | 2020-02-27 | 1.0 | 0.0 | 0.0 |
| 6 | Afghanistan | 2020-02-28 | 1.0 | 0.0 | 0.0 |
| 7 | Afghanistan | 2020-02-29 | 1.0 | 0.0 | 0.0 |
| 8 | Afghanistan | 2020-03-01 | 1.0 | 0.0 | 0.0 |
| 9 | Afghanistan | 2020-03-02 | 1.0 | 0.0 | 0.0 |
| 10 | Afghanistan | 2020-03-03 | 2.0 | 0.0 | 0.0 |
| 11 | Afghanistan | 2020-03-04 | 4.0 | 0.0 | 0.0 |
| 12 | Afghanistan | 2020-03-05 | 4.0 | 0.0 | 0.0 |
| 13 | Afghanistan | 2020-03-06 | 4.0 | 0.0 | 0.0 |
| 14 | Afghanistan | 2020-03-07 | 4.0 | 0.0 | 0.0 |
| 15 | Afghanistan | 2020-03-08 | 5.0 | 0.0 | 0.0 |
| 16 | Afghanistan | 2020-03-09 | 7.0 | 0.0 | 0.0 |
| 17 | Afghanistan | 2020-03-10 | 8.0 | 0.0 | 0.0 |
| 18 | Afghanistan | 2020-03-11 | 11.0 | 0.0 | 0.0 |
| 19 | Afghanistan | 2020-03-12 | 12.0 | 0.0 | 0.0 |
countries = df3['Country'].unique()
len(countries)
229
for idx in range(0,len(countries)):
C = df3[df3['Country']==countries[idx]].reset_index()
plt.scatter(np.arange(0,len(C)),C['Confirmed'],color='blue',label='Confirmed')
plt.scatter(np.arange(0,len(C)),C['Recovered'],color='green',label='Recovered')
plt.scatter(np.arange(0,len(C)),C['Deaths'],color='red',label='Deaths')
plt.title(countries[idx])
plt.xlabel('Days since the first suspect')
plt.ylabel('Number of cases')
plt.legend()
plt.show()
df4 = df3.groupby(['Date'])[['Date','Confirmed','Deaths','Recovered']].sum().reset_index()
C = df4
plt.scatter(np.arange(0,len(C)),C['Confirmed'],color='blue',label='Confirmed')
plt.scatter(np.arange(0,len(C)),C['Recovered'],color='green',label='Recovered')
plt.scatter(np.arange(0,len(C)),C['Deaths'],color='red',label='Deaths')
plt.title('World')
plt.xlabel('Days since the first suspect')
plt.ylabel('Number of cases')
plt.legend()
plt.show()